library(patchwork)
library(knitr)
library(effects)
library(car)
library(stargazer)
library(tmap)
tmap_mode('plot')
library(sf)
sf::sf_use_s2(FALSE)
library(httr)
library(jsonlite)
library(lubridate)
library(tidyverse)Code
Beginners and expert citizen scientists prefer similar species on iNaturalist, but experts contribute in average almost hundred times more data
We aim to identify users’ recording preferences according to their level of experience on the iNaturalist platform in Uruguay in order to understand the possible biases associated with the data.
Workflow
- Data download
- Users’ ranking
- Species traits
- Statistical analyses
Data download
Code
observations <- read_csv('data/NatUY_observations_03-05.csv',
guess_max = 140000)Users’ ranking
First we need to detect those users that are not from Uruguay and remove the data generated by them from our dataset. To do this we created a function using the iNaturalist API.
The function getObserversNumObservations() takes a list of users’ ids (user_login_list) and a location (place_id), and retrieves: the user_id, user_login, and user_name, the number of observations and species of the user on iNat and on the location (in our case, Uruguay, place_id=7259), and and the date when the user created their account (user_created_at).
Code
getObserversNumObservations <- function(user_login_list,
place_id=7259){
observers_num_observations <- tibble(user_id = numeric(),
observations_iNat = numeric(),
observations_NatUY = numeric(),
species_iNat = numeric(),
species_NatUY = numeric(),
user_login = character(),
user_created_at = lubridate::ymd_hms(),
user_name = character())
num_results <- 1
for (user_login in user_login_list) {
if ((num_results %% 10) + 10 == 10) {
Sys.sleep(10) # The API needs a delay because otherwise it gives an error.
# Every 10 users, the code stops for 10 second
}
call <- str_glue('https://api.inaturalist.org/v1/observations/observers',
'?user_login={user_login}&',
'place_id={place_id}')
get_json_call <- GET(url = call) %>%
content(as = 'text') %>%
fromJSON(flatten = TRUE)
if(!'error' %in% names(get_json_call)) {
results <- as_tibble(get_json_call$results)
observer_num_observations <-
tibble(user_id = results$user_id,
observations_iNat = results$user.observations_count,
observations_NatUY = results$observation_count,
species_iNat = results$user.species_count,
species_NatUY = results$species_count,
user_login = results$user.login,
user_created_at = results$user.created_at,
user_name = results$user.name)
observers_num_observations <- rbind(observers_num_observations,
observer_num_observations)
cat(num_results, 'user:', user_login, ',',
observer_num_observations$observations_iNat, 'observations on iNat', '\n')
}
else {
observer_num_observations <- tibble(user_id = NA,
observations_iNat = NA,
observations_NatUY = NA,
species_iNat = NA,
species_NatUY = NA,
user_login = user_login,
user_created_at = NA,
user_name = NA)
observers_num_observations <- rbind(observers_num_observations,
observer_num_observations)
cat('user:', user_login, '--> NOT FOUND', '\n')
}
num_results <- nrow(observers_num_observations) + 1
}
return(observers_num_observations)
}
users_dataset <- observations %>% distinct(user_login)
observers_num_observations <- getObserversNumObservations(users_dataset$user_login)
# write_csv(observers_num_observations,
# 'data/observers_num_observations.csv')Next, we discard visitors by identifying the Uruguayan users as
Code
uruguayans <- observers_num_observations %>%
mutate(proportion_natuy_inat = round(observations_NatUY*100/observations_iNat, 3),
uruguayan = ifelse(proportion_natuy_inat>40 , 'yes', 'no')) %>%
filter(uruguayan == 'yes')
observations_uy <- filter(observations, user_login %in% uruguayans$user_login)After discarding foreign users, we calculated the number of records uploaded (observations), the time active on the platform (taking the dates of the first and last records uploaded, activity_time) and the number of records over time (the total number of records uploaded divided by the active time, observations_by_time).
With these variables we first categorised users as expert, intermediate or beginner using the following criteria:
- Expert: Has 1,000 records or more AND has been active on the platform for more than one year AND has a records/time ratio greater or equal to 0.6.
- Intermediate: Has between 50 and 1,000 records AND has been active on the platform for more than 3 months AND has a records/time ratio greater than 0.2.
- Beginner: Has less than 50 records AND has been active on the platform for less than 3 months
Finally, we ranked the users according to their level of experience in the platform by calculating an index, such that:
Index = \text{category\_score} \cdot \left( w_1 \cdot \text{observations}_{\text{norm}} + w_2 \cdot \text{activity\_time}_{\text{norm}} + w_3 \cdot \text{observations\_by\_time}_{\text{norm}} \right)
We gave the same weight to all the variables (w_1 = w_2 = w_3 = 1/3).
We used the category_score to account for the categories we had previously established, thus, the outcome is a ranking that has experts, intermediates, and beginners sorted within each category.
Code
normalise <- function(x) {
(x - min(x)) / (max(x) - min(x))
}
users_dataset <- observations_uy %>%
group_by(user_login) %>%
summarise(
first_record = min(created_at),
last_record = max(created_at),
observations = n(),
activity_time = as.numeric(difftime(last_record, first_record, units = 'days')) + 1,
observations_by_time = observations / activity_time
) %>%
filter(observations >= 3 & activity_time > 3) %>%
mutate(
user_category = ifelse(
observations >= 1000 & activity_time >= 365 & observations_by_time >= 0.6, 'expert',
ifelse(observations >= 50 & activity_time > 90 & observations_by_time > 0.2, 'intermediate', 'beginner')
),
category_score = case_when(
user_category == 'expert' ~ 10,
user_category == 'intermediate' ~ 1,
user_category == 'beginner' ~ 0.01
)
) %>%
mutate(
# Normalize variables
observations_norm = normalise(observations),
activity_time_norm = normalise(activity_time),
observations_by_time_norm = normalise(observations_by_time)
) %>%
mutate(
# Define weights for variables
w1 = 2/3, # observations
w2 = 1/6, # activity_time
w3 = 1/6, # observations_by_time
# Calculate the index with a category score multiplier
index = category_score * (
w1 * observations_norm +
w2 * activity_time_norm +
w3 * observations_by_time_norm
)
) %>%
# Rank users based on the index
arrange(desc(index)) %>%
mutate(ranking = row_number())
# write_csv(users_dataset, 'data/users_dataset.csv')Code
# top 5 users per category
users_dataset %>%
group_by(category_score) %>%
slice_head(n = 5) %>% ungroup() %>%
select(ranking, user_category,user_login,
observations, activity_time, observations_by_time) %>%
arrange(ranking) %>%
rename(Ranking = ranking,
Category = user_category,
User = user_login,
`N of observations` = observations,
`Activity (in days)` = activity_time,
`N of observations per day` = observations_by_time) %>%
kableExtra::kbl(format.args = list(decimal.mark = '.',
big.mark = ","),
digits=2,
booktabs = T) %>%
kableExtra::kable_styling(latex_options = c('striped', 'hold_position'))| Ranking | Category | User | N of observations | Activity (in days) | N of observations per day |
|---|---|---|---|---|---|
| 1 | expert | santiagomailhos | 7,106 | 1,909.61 | 3.72 |
| 2 | expert | luisvescia | 5,030 | 858.86 | 5.86 |
| 3 | expert | ornitoloca | 3,554 | 1,342.43 | 2.65 |
| 4 | expert | enriquecenoz | 3,096 | 579.77 | 5.34 |
| 5 | expert | msilvera | 2,484 | 1,673.52 | 1.48 |
| 21 | intermediate | mirmeleon | 1,121 | 2,146.10 | 0.52 |
| 22 | intermediate | rafatosi | 1,143 | 1,995.26 | 0.57 |
| 23 | intermediate | gabriellaufer | 853 | 1,642.21 | 0.52 |
| 24 | intermediate | leo_lagos | 794 | 1,685.25 | 0.47 |
| 25 | intermediate | amailhos | 638 | 1,907.84 | 0.33 |
| 113 | beginner | lyn_loveless | 285 | 14.88 | 19.15 |
| 114 | beginner | intiporley | 4 | 3,165.88 | 0.00 |
| 115 | beginner | smantaras | 382 | 1,994.36 | 0.19 |
| 116 | beginner | gusper | 224 | 2,177.10 | 0.10 |
| 117 | beginner | mariusvk | 166 | 13.46 | 12.34 |
Code
# users per category
users_dataset %>%
group_by(category_score, user_category) %>%
count() %>% ungroup() %>%
arrange(category_score) %>% select(-category_score) %>%
mutate(`%` = scales::label_percent()(n / sum(n))) %>%
rename(Category= user_category,
`N of users`=n) %>%
janitor::adorn_totals() %>%
kableExtra::kbl(format.args = list(decimal.mark = '.',
big.mark = ","),
digits=2,
booktabs = T) %>%
kableExtra::kable_styling(latex_options = c('striped', 'hold_position'))| Category | N of users | % |
|---|---|---|
| beginner | 824 | 88.0% |
| intermediate | 92 | 9.8% |
| expert | 20 | 2.1% |
| Total | 936 | - |
Code
# observations per category
left_join(observations_uy, users_dataset %>%
select(user_login, observations,category_score, user_category, ranking)) %>%
filter(!is.na(user_category)) %>%
group_by(category_score, user_category) %>%
count() %>% ungroup() %>%
arrange(category_score) %>% select(-category_score) %>%
mutate(`%` = scales::label_percent()(n / sum(n))) %>%
rename(Category= user_category,
`N of observations`=n) %>%
janitor::adorn_totals() %>%
kableExtra::kbl(format.args = list(decimal.mark = '.',
big.mark = ","),
digits=2,
booktabs = T) %>%
kableExtra::kable_styling(latex_options = c('striped', 'hold_position'))| Category | N of observations | % |
|---|---|---|
| beginner | 21,498 | 22.2% |
| intermediate | 30,351 | 31.4% |
| expert | 44,808 | 46.4% |
| Total | 96,657 | - |
- Average number of records for a beginner = 26.1
- Average number of records for an intermediate =
rround(30351/92, 1)`
- Average number of records for an expert =
rround(44808/20, 1)`
Species traits
First, we exported the list of species for tetrapods and plants.
Code
species_list <- observations_uy %>%
filter(quality_grade == 'research') %>%
select(taxon_kingdom_name, taxon_phylum_name,
taxon_class_name, taxon_order_name, taxon_family_name,
taxon_genus_name, taxon_species_name) %>%
filter(str_count(taxon_species_name, '\\S+') == 2)
# (str_count(scientific_name, '\\S+') ==2) allows us to select
# those records that have two words in the scientific_name field
## tetrapods
tetra <- species_list %>%
filter(taxon_class_name == 'Aves' |
taxon_class_name == 'Amphibia' |
taxon_class_name == 'Mammalia' |
taxon_class_name == 'Reptilia') %>%
group_by(taxon_class_name, taxon_species_name) %>%
count()
# write_csv(tetra,'data/tetra_list.csv')
## plants
dico <- species_list %>%
filter(taxon_family_name == 'Fabaceae' |
taxon_family_name == 'Cactaceae' |
taxon_family_name == 'Asteraceae'|
taxon_family_name == 'Solanaceae') %>%
group_by(taxon_family_name, taxon_species_name) %>%
count()
# write_csv(plants,'data/dico_list.csv')Then, using the list of species, we conducted a literature search, at the national level, to identify the following traits for each species: distribution area, conservation status and body size or growth form depending on whether it was an animal or a plant.
Code
tetrapods_traits <- read_csv('data/tetrapods_traits.csv')
plants_traits <- read_csv('data/plants_traits.csv')We identified 33 tetrapod species and 52 plant species listed as non-native or domestic/cultivated (e.g. dog, horse, tobacco) in Uruguay, and discarded them from the analyses.
Code
plants_traits %>%
filter(grepl('non', remarks)) %>%
distinct(taxon_family_name, taxon_species_name) %>%
arrange(taxon_family_name, taxon_species_name) %>%
kableExtra::kbl(booktabs = T) %>%
kableExtra::kable_styling(latex_options = c('striped', 'hold_position')) %>%
kableExtra::scroll_box(height = '300px')| taxon_family_name | taxon_species_name |
|---|---|
| Asteraceae | Bidens aurea |
| Asteraceae | Calendula officinalis |
| Asteraceae | Carduus nutans |
| Asteraceae | Carduus tenuiflorus |
| Asteraceae | Cladanthus mixtus |
| Asteraceae | Cosmos bipinnatus |
| Asteraceae | Cosmos sulphureus |
| Asteraceae | Cotula nigellifolia |
| Asteraceae | Dimorphotheca ecklonis |
| Asteraceae | Dittrichia viscosa |
| Asteraceae | Euryops chrysanthemoides |
| Asteraceae | Gazania rigens |
| Asteraceae | Helenium amarum |
| Asteraceae | Helianthus annuus |
| Asteraceae | Hypochaeris glabra |
| Asteraceae | Lactuca sativa |
| Asteraceae | Pseudogynoxys chenopodioides |
| Asteraceae | Senecio angulatus |
| Asteraceae | Senecio tamoides |
| Asteraceae | Tanacetum parthenium |
| Asteraceae | Taraxacum erythrospermum |
| Asteraceae | Tragopogon porrifolius |
| Asteraceae | Youngia japonica |
| Asteraceae | Zinnia elegans |
| Cactaceae | Austrocylindropuntia subulata |
| Cactaceae | Opuntia cochenillifera |
| Cactaceae | Opuntia ficus-indica |
| Cactaceae | Parodia fusca |
| Cactaceae | Selenicereus undatus |
| Fabaceae | Acacia elata |
| Fabaceae | Acacia mearnsii |
| Fabaceae | Acacia podalyriifolia |
| Fabaceae | Acacia retinodes |
| Fabaceae | Albizia julibrissin |
| Fabaceae | Bauhinia variegata |
| Fabaceae | Glycine max |
| Fabaceae | Lathyrus latifolius |
| Fabaceae | Lathyrus odoratus |
| Fabaceae | Senna didymobotrya |
| Fabaceae | Trifolium angustifolium |
| Fabaceae | Trifolium fragiferum |
| Fabaceae | Trifolium subterraneum |
| Fabaceae | Vachellia karroo |
| Fabaceae | Vicia villosa |
| Fabaceae | Wisteria sinensis |
| Solanaceae | Brugmansia arborea |
| Solanaceae | Nicandra physalodes |
| Solanaceae | Nicotiana tabacum |
| Solanaceae | Physalis peruviana |
| Solanaceae | Solanum lycopersicum |
| Solanaceae | Solanum tuberosum |
| Solanaceae | Streptosolen jamesonii |
Code
tetrapods_traits %>%
filter(grepl('non', remarks)) %>%
distinct(taxon_class_name, taxon_species_name) %>%
arrange(taxon_class_name, taxon_species_name) %>%
kableExtra::kbl(booktabs = T) %>%
kableExtra::kable_styling(latex_options = c('striped', 'hold_position')) %>%
kableExtra::scroll_box(height = '300px')| taxon_class_name | taxon_species_name |
|---|---|
| Amphibia | Lithobates catesbeianus |
| Aves | Agapornis personatus |
| Aves | Amazona aestiva |
| Aves | Anas platyrhynchos |
| Aves | Anser anser |
| Aves | Aratinga nenday |
| Aves | Carduelis carduelis |
| Aves | Catharus ustulatus |
| Aves | Chloris chloris |
| Aves | Columba livia |
| Aves | Corvus splendens |
| Aves | Fulmarus glacialis |
| Aves | Gallus gallus |
| Aves | Meleagris gallopavo |
| Aves | Melopsittacus undulatus |
| Aves | Motacilla alba |
| Aves | Numida meleagris |
| Aves | Nymphicus hollandicus |
| Aves | Passer domesticus |
| Aves | Pavo cristatus |
| Aves | Sturnus vulgaris |
| Mammalia | Axis axis |
| Mammalia | Canis familiaris |
| Mammalia | Dama dama |
| Mammalia | Felis catus |
| Mammalia | Lepus europaeus |
| Mammalia | Mus musculus |
| Mammalia | Oryctolagus cuniculus |
| Mammalia | Rattus norvegicus |
| Mammalia | Rattus rattus |
| Mammalia | Sus scrofa |
| Reptilia | Hemidactylus mabouia |
| Reptilia | Tarentola mauritanica |
Final datasets
Finally we combine the observations, with the users’ categorisation and the species traits, and create two tables (for tetrapods and plants), with observations as rows.
Code
observations_dataset <- left_join(observations_uy,
users_dataset %>%
select(user_login,
observations,
user_category,
ranking)) %>%
filter(!is.na(user_category))
# write_csv(observations_dataset, 'data/observations_dataset.csv')And then, we classify the quantitative traits using qualitative values.
| Trait | Quantitative value | Qualitative value | Criteria for qualitative value |
|---|---|---|---|
| Distribution area | Number of departments where the species is recorded at (from a total of 19) | Narrow | Present in at least 5 departments |
| Medium | Present in 6 to 16 departments | ||
| Wide | Present in 17 or more departments | ||
| Body size (tetrapods) | Average body length of the species in centimetres | Small | Mammals < 50cm Birds < 20cm Reptiles < 50cm Amphibians < 5cm |
| Medium | Mammals >= 50cm and < 200cm Birds >= 20cm and < 50 cm Reptiles >= 50cm and < 100cm Amphibians >= 5cm and < 10cm |
||
| Large | Mammals >= 200cm Birds >= 50cm Reptiles >= 100cm Amphibians >= 10cm |
||
| Growth form (plants) |
Herb Vine Liana Subshrub Shrub Tree |
Classification according to Darwinion (Zuloaga et al., 2019) | |
| Conservation status | Least concern (LC) Near threatened (NT) Vulnerable (VU) Endangered (EN) Critically endangered (CR) Not evaluated (NE) Data deficient (DD) |
Classification according to the International Union for Conservation of Nature (IUCN, 2023) |
Tetrapods’ traits
Code
# tetrapods
tetra_data <- left_join(observations_dataset,
tetrapods_traits %>%
filter(!grepl('non', remarks))) %>%
filter(!is.na(distribution_area)) %>%
mutate(distribution = case_when(distribution_area <= 5 ~ 'narrow',
distribution_area > 5 & distribution_area <= 16 ~ 'medium',
distribution_area > 16 ~ 'wide',
is.na(distribution_area) ~ 'not assessed')) %>%
mutate(size = case_when(taxon_class_name == 'Mammalia' &
body_size < 50 ~ 'small',
taxon_class_name == 'Mammalia' &
body_size >= 50 & body_size < 200 ~ 'medium',
taxon_class_name == 'Mammalia' &
body_size >= 200 ~ 'large',
taxon_class_name == 'Amphibia' &
body_size < 5 ~ 'small',
taxon_class_name == 'Amphibia' &
body_size >= 5 & body_size < 10 ~ 'medium',
taxon_class_name == 'Amphibia' &
body_size >= 10 ~ 'large',
taxon_class_name == 'Reptilia' &
body_size < 50 ~ 'small',
taxon_class_name == 'Reptilia' &
body_size >= 50 & body_size < 100 ~ 'medium',
taxon_class_name == 'Reptilia' &
body_size >= 100 ~ 'large',
taxon_class_name == 'Aves' &
body_size < 20 ~ 'small',
taxon_class_name == 'Aves' &
body_size >= 20 & body_size < 50 ~ 'medium',
taxon_class_name == 'Aves' &
body_size >= 50 ~ 'large'))
# write_csv(tetra_data, "data/tetra_data.csv")Plants’ traits
Code
plants_data <- left_join(observations_dataset,
plants_traits %>%
filter(!grepl('non', remarks))) %>%
filter(!is.na(distribution_area)) %>%
mutate(distribution = case_when(distribution_area <= 5 ~ 'narrow',
distribution_area > 5 & distribution_area <= 16 ~ 'medium',
distribution_area > 16 ~ 'wide',
is.na(distribution_area) ~ 'not assessed'))
# write_csv(dico_data, "data/dico_data.csv")Scale variables
Now let’s transform the data, ordering the qualitative variables and scaling the quantitative variables. Then, we filter out users with less than 3 records.
Code
tetrapods <- tetra_data %>%
group_by(user_login) %>%
mutate(n_observations = n()) %>% ungroup() %>%
filter(n_observations>=3) %>%
mutate(user_category = factor(user_category,
levels = c('expert',
'intermediate',
'beginner'))) %>%
mutate(status = factor(conservation_status,
levels = c('CR','EN','VU',
'NT', 'DD', 'NE',
'LC'))) %>%
select(ranking,
dist_class=distribution, size_class=size,
dist=distribution_area, size=body_size,
status, taxon=taxon_class_name,
state=place_state_name,
user_category, user_login, taxon_species_name,
latitude, longitude) %>%
mutate(size_scaled = scale(size, center = TRUE)[,1],
dist_scaled = scale(dist, center = TRUE)[,1],
log_dist = log(dist))
plants <- plants_data %>%
group_by(user_login) %>%
mutate(n_observations = n()) %>% ungroup() %>%
filter(n_observations>=3) %>%
mutate(user_category = factor(user_category,
levels = c('expert',
'intermediate',
'beginner'))) %>%
mutate(status = factor(conservation_status,
levels = c('CR','EN','VU',
'NT', 'DD', 'NE',
'LC')),
growth = factor(growth_form,
levels = c('herb', 'vine',
'liana', 'subshrub',
'shrub', 'tree'))) %>%
select(ranking, dist=distribution_area,
dist_class = distribution, growth,
status, taxon=taxon_family_name,
state=place_state_name,
user_category, user_login, taxon_species_name,
latitude, longitude) %>%
mutate(dist_scaled = scale(dist, center = TRUE)[,1],
log_dist = log(dist))Average of species-variable per user
And finish up with aggregating records per user and computing the mean and SD of the values of the two traits. For tetrapods distribution area and body size, and for plants distribution area.
Code
tetrapods_per_user <- tetrapods %>%
group_by(user_login) %>%
summarise(mean_dist = mean(dist),
mean_size = mean(size),
sd_size = sd(size),
sd_dist = sd(dist),
ranking = first(ranking),
user_category = first(user_category))
plants_per_user <- plants %>%
group_by(user_login) %>%
summarise(mean_dist = mean(dist),
sd_dist = sd(dist),
ranking = first(ranking),
user_category = first(user_category))
# ggplot(tetrapods_per_user, aes(x = mean_dist, y=ranking)) +
# geom_point(aes(col=user_category), alpha = 0.7,) +
# labs(x = 'mean(distribution per user)', col='') +
# ggpubr::theme_pubclean() +
# theme(legend.position = 'bottom')
#
# ggplot(tetrapods_per_user, aes(x = sd_dist, y=ranking)) +
# geom_point(aes(col=user_category), alpha = 0.7,) +
# labs(x = 'SD(distribution per user)', col='') +
# ggpubr::theme_pubclean() +
# theme(legend.position = 'bottom')
#
# ggplot(plants_per_user, aes(x = sd_dist, y=ranking)) +
# geom_point(aes(col=user_category), alpha = 0.7,) +
# labs(x = 'SD(distribution per user)', col='') +
# ggpubr::theme_pubclean() +
# theme(legend.position = 'bottom')
#
# ggplot(plants_per_user, aes(x = mean_dist, y=ranking)) +
# geom_point(aes(col=user_category), alpha = 0.7,) +
# labs(x = 'mean(distribution per user)', col='') +
# ggpubr::theme_pubclean() +
# theme(legend.position = 'bottom')
# hist(tetrapods_per_user$mean_dist,
# xlab = 'mean(distribution per user)',
# main='tetrapods')
# hist(tetrapods_per_user$sd_dist,
# xlab = 'SD(distribution per user)',
# main='tetrapods')
#
# hist(tetrapods_per_user$mean_size,
# xlab = 'mean(size per user)',
# main='tetrapods')
# hist(tetrapods_per_user$sd_size,
# xlab = 'SD(size per user)',
# main='tetrapods')
#
# hist(plants_per_user$mean_dist,
# xlab = 'mean(distribution per user)',
# main='plants')
# hist(plants_per_user$sd_dist,
# xlab = 'SD(distribution per user)',
# main='plants')Summary of the data
Code
tibble(Group = c('tetrapods', 'plants'),
Users = c(nrow(tetrapods %>%
distinct(user_login)),
nrow(plants %>%
distinct(user_login))),
Observations = c(nrow(tetrapods), nrow(plants)),
Species = c(nrow(tetrapods %>%
distinct(taxon_species_name)),
nrow(plants %>%
distinct(taxon_species_name)))) %>%
janitor::adorn_totals() %>%
kableExtra::kbl(format.args = list(decimal.mark = '.',
big.mark = ","),
digits=2,
booktabs = T) %>%
kableExtra::kable_styling(latex_options = c('striped', 'hold_position'))| Group | Users | Observations | Species |
|---|---|---|---|
| tetrapods | 358 | 22,918 | 602 |
| plants | 291 | 10,821 | 530 |
| Total | 649 | 33,739 | 1,132 |
Code
tetrapods %>%
group_by(taxon) %>%
summarise(observations=n(),
species=n_distinct(taxon_species_name)) %>%
janitor::adorn_totals() %>%
kableExtra::kbl(format.args = list(decimal.mark = '.',
big.mark = ","),
digits=2,
booktabs = T) %>%
kableExtra::kable_styling(latex_options = c('striped', 'hold_position'))| taxon | observations | species |
|---|---|---|
| Amphibia | 1,768 | 35 |
| Aves | 17,776 | 441 |
| Mammalia | 1,737 | 68 |
| Reptilia | 1,637 | 58 |
| Total | 22,918 | 602 |
Code
tetrapods %>%
distinct(user_login, .keep_all = T) %>%
group_by(user_category) %>%
count() %>% ungroup() %>%
mutate(`%` = scales::label_percent()(n / sum(n))) %>%
kableExtra::kbl(format.args = list(decimal.mark = '.',
big.mark = ","),
digits=2,
booktabs = T) %>%
kableExtra::kable_styling(latex_options = c('striped', 'hold_position'))| user_category | n | % |
|---|---|---|
| expert | 20 | 6% |
| intermediate | 80 | 22% |
| beginner | 258 | 72% |
Code
plants %>%
group_by(taxon) %>%
summarise(observations=n(),
species=n_distinct(taxon_species_name)) %>%
janitor::adorn_totals() %>%
kableExtra::kbl(format.args = list(decimal.mark = '.',
big.mark = ","),
digits=2,
booktabs = T) %>%
kableExtra::kable_styling(latex_options = c('striped', 'hold_position'))| taxon | observations | species |
|---|---|---|
| Asteraceae | 6,023 | 284 |
| Cactaceae | 1,240 | 44 |
| Fabaceae | 2,230 | 143 |
| Solanaceae | 1,328 | 59 |
| Total | 10,821 | 530 |
Code
plants %>%
distinct(user_login, .keep_all = T) %>%
group_by(user_category) %>%
count() %>% ungroup() %>%
mutate(`%` = scales::label_percent()(n / sum(n))) %>%
kableExtra::kbl(format.args = list(decimal.mark = '.',
big.mark = ","),
digits=2,
booktabs = T) %>%
kableExtra::kable_styling(latex_options = c('striped', 'hold_position'))| user_category | n | % |
|---|---|---|
| expert | 18 | 6% |
| intermediate | 72 | 25% |
| beginner | 201 | 69% |
Tetrapods
Code
tetrapods %>%
group_by(dist_class) %>%
summarise(n = n()) %>%
mutate(freq = scales::label_percent(accuracy=1)(n/sum(n))) %>%
rename(`distribution area` = dist_class,
`observations` = n,
`%` = freq) %>%
kableExtra::kbl(format.args = list(decimal.mark = '.',
big.mark = ","),
digits=2,
booktabs = T) %>%
kableExtra::kable_styling(latex_options = c('striped', 'hold_position'))| distribution area | observations | % |
|---|---|---|
| medium | 2,970 | 13% |
| narrow | 1,115 | 5% |
| wide | 18,833 | 82% |
Code
tetrapods %>%
group_by(user_category, dist_class) %>%
summarise(n = n()) %>%
mutate(freq = scales::label_percent(accuracy=1)(n/sum(n))) %>%
pivot_wider(names_from ='user_category',
values_from = c('n', 'freq'))%>%
kableExtra::kbl(format.args = list(decimal.mark = '.',
big.mark = ","),
digits=2,
booktabs = T) %>%
kableExtra::kable_styling(latex_options = c('striped', 'hold_position'))| dist_class | n_expert | n_intermediate | n_beginner | freq_expert | freq_intermediate | freq_beginner |
|---|---|---|---|---|---|---|
| medium | 1,773 | 881 | 316 | 15% | 11% | 10% |
| narrow | 646 | 333 | 136 | 6% | 4% | 4% |
| wide | 9,197 | 6,909 | 2,727 | 79% | 85% | 86% |
Code
tetrapods %>%
group_by(size_class) %>%
summarise(n = n()) %>%
mutate(freq = scales::label_percent(accuracy=1)(n/sum(n)))%>%
rename(`body size` = size_class,
`observations` = n,
`%` = freq) %>%
kableExtra::kbl(format.args = list(decimal.mark = '.',
big.mark = ","),
digits=2,
booktabs = T) %>%
kableExtra::kable_styling(latex_options = c('striped', 'hold_position'))| body size | observations | % |
|---|---|---|
| large | 4,449 | 19% |
| medium | 9,802 | 43% |
| small | 8,667 | 38% |
Code
tetrapods %>%
group_by(user_category, size_class) %>%
summarise(n = n()) %>%
mutate(freq = scales::label_percent(accuracy=1)(n/sum(n))) %>%
pivot_wider(names_from ='user_category',
values_from = c('n', 'freq')) %>%
kableExtra::kbl(format.args = list(decimal.mark = '.',
big.mark = ","),
digits=2,
booktabs = T) %>%
kableExtra::kable_styling(latex_options = c('striped', 'hold_position'))| size_class | n_expert | n_intermediate | n_beginner | freq_expert | freq_intermediate | freq_beginner |
|---|---|---|---|---|---|---|
| large | 2,045 | 1,696 | 708 | 18% | 21% | 22% |
| medium | 4,903 | 3,562 | 1,337 | 42% | 44% | 42% |
| small | 4,668 | 2,865 | 1,134 | 40% | 35% | 36% |
Code
tetrapods %>%
group_by(status) %>%
summarise(n = n()) %>%
mutate(freq = scales::label_percent(accuracy=1)(n/sum(n))) %>%
rename(`conservation satus` = status,
`observations` = n,
`%` = freq) %>%
kableExtra::kbl(format.args = list(decimal.mark = '.',
big.mark = ","),
digits=2,
booktabs = T) %>%
kableExtra::kable_styling(latex_options = c('striped', 'hold_position'))| conservation satus | observations | % |
|---|---|---|
| CR | 2 | 0% |
| EN | 103 | 0% |
| VU | 334 | 1% |
| NT | 835 | 4% |
| DD | 52 | 0% |
| NE | 901 | 4% |
| LC | 20,691 | 90% |
Code
tetrapods %>%
group_by(user_category, status) %>%
summarise(n = n()) %>%
mutate(freq = scales::label_percent(accuracy=1)(n/sum(n))) %>%
pivot_wider(names_from ='user_category',
values_from = c('n', 'freq')) %>%
kableExtra::kbl(format.args = list(decimal.mark = '.',
big.mark = ","),
digits=2,
booktabs = T) %>%
kableExtra::kable_styling(latex_options = c('striped', 'hold_position'))| status | n_expert | n_intermediate | n_beginner | freq_expert | freq_intermediate | freq_beginner |
|---|---|---|---|---|---|---|
| CR | 1 | NA | 1 | 0% | NA | 0% |
| EN | 70 | 29 | 4 | 1% | 0% | 0% |
| VU | 178 | 104 | 52 | 2% | 1% | 2% |
| NT | 476 | 269 | 90 | 4% | 3% | 3% |
| DD | 16 | 26 | 10 | 0% | 0% | 0% |
| NE | 319 | 394 | 188 | 3% | 5% | 6% |
| LC | 10,556 | 7,301 | 2,834 | 91% | 90% | 89% |
Plants
Code
##### PLANTS
plants %>%
group_by(dist_class) %>%
summarise(n = n()) %>%
mutate(freq = scales::label_percent(accuracy=1)(n/sum(n))) %>%
rename(`distribution area` = dist_class,
`observations` = n,
`%` = freq) %>%
kableExtra::kbl(format.args = list(decimal.mark = '.',
big.mark = ","),
digits=2,
booktabs = T) %>%
kableExtra::kable_styling(latex_options = c('striped', 'hold_position'))| distribution area | observations | % |
|---|---|---|
| medium | 6,324 | 58% |
| narrow | 4,283 | 40% |
| wide | 214 | 2% |
Code
plants %>%
group_by(user_category, dist_class) %>%
summarise(n = n()) %>%
mutate(freq = scales::label_percent(accuracy=1)(n/sum(n))) %>%
pivot_wider(names_from ='user_category',
values_from = c('n', 'freq')) %>%
kableExtra::kbl(format.args = list(decimal.mark = '.',
big.mark = ","),
digits=2,
booktabs = T) %>%
kableExtra::kable_styling(latex_options = c('striped', 'hold_position'))| dist_class | n_expert | n_intermediate | n_beginner | freq_expert | freq_intermediate | freq_beginner |
|---|---|---|---|---|---|---|
| medium | 2,886 | 2,130 | 1,308 | 58% | 60% | 57% |
| narrow | 1,968 | 1,385 | 930 | 40% | 39% | 41% |
| wide | 118 | 57 | 39 | 2% | 2% | 2% |
Code
plants %>%
group_by(growth) %>%
summarise(n = n()) %>%
mutate(freq = scales::label_percent(accuracy=1)(n/sum(n))) %>%
rename(`growth form` = growth,
`observations` = n,
`%` = freq) %>%
kableExtra::kbl(format.args = list(decimal.mark = '.',
big.mark = ","),
digits=2,
booktabs = T) %>%
kableExtra::kable_styling(latex_options = c('striped', 'hold_position'))| growth form | observations | % |
|---|---|---|
| herb | 6,435 | 59% |
| vine | 116 | 1% |
| liana | 75 | 1% |
| subshrub | 1,278 | 12% |
| shrub | 2,173 | 20% |
| tree | 744 | 7% |
Code
plants %>%
group_by(user_category, growth) %>%
summarise(n = n()) %>%
mutate(freq = scales::label_percent(accuracy=1)(n/sum(n))) %>%
pivot_wider(names_from ='user_category',
values_from = c('n', 'freq')) %>%
kableExtra::kbl(format.args = list(decimal.mark = '.',
big.mark = ","),
digits=2,
booktabs = T) %>%
kableExtra::kable_styling(latex_options = c('striped', 'hold_position'))| growth | n_expert | n_intermediate | n_beginner | freq_expert | freq_intermediate | freq_beginner |
|---|---|---|---|---|---|---|
| herb | 3,034 | 2,069 | 1,332 | 61% | 58% | 58% |
| vine | 58 | 32 | 26 | 1% | 1% | 1% |
| liana | 32 | 25 | 18 | 1% | 1% | 1% |
| subshrub | 622 | 379 | 277 | 13% | 11% | 12% |
| shrub | 955 | 757 | 461 | 19% | 21% | 20% |
| tree | 271 | 310 | 163 | 5% | 9% | 7% |
Code
plants %>%
group_by(status) %>%
summarise(n = n()) %>%
mutate(freq = scales::label_percent(accuracy=1)(n/sum(n))) %>%
rename(`conservation status` = status,
`observations` = n,
`%` = freq) %>%
kableExtra::kbl(format.args = list(decimal.mark = '.',
big.mark = ","),
digits=2,
booktabs = T) %>%
kableExtra::kable_styling(latex_options = c('striped', 'hold_position'))| conservation status | observations | % |
|---|---|---|
| CR | 4 | 0% |
| EN | 23 | 0% |
| VU | 259 | 2% |
| NT | 12 | 0% |
| DD | 4 | 0% |
| NE | 8,633 | 80% |
| LC | 1,886 | 17% |
Code
plants %>%
group_by(user_category, status) %>%
summarise(n = n()) %>%
mutate(freq = scales::label_percent(accuracy=1)(n/sum(n))) %>%
pivot_wider(names_from ='user_category',
values_from = c('n', 'freq')) %>%
kableExtra::kbl(format.args = list(decimal.mark = '.',
big.mark = ","),
digits=2,
booktabs = T) %>%
kableExtra::kable_styling(latex_options = c('striped', 'hold_position'))| status | n_expert | n_intermediate | n_beginner | freq_expert | freq_intermediate | freq_beginner |
|---|---|---|---|---|---|---|
| EN | 11 | 9 | 3 | 0% | 0% | 0% |
| VU | 161 | 61 | 37 | 3% | 2% | 2% |
| NT | 3 | 8 | 1 | 0% | 0% | 0% |
| DD | 1 | 1 | 2 | 0% | 0% | 0% |
| NE | 3,978 | 2,825 | 1,830 | 80% | 79% | 80% |
| LC | 818 | 664 | 404 | 16% | 19% | 18% |
| CR | NA | 4 | NA | NA | 0% | NA |
Plots
Per group and user category
Code
data_sf <- bind_rows(tetrapods,plants) %>%
sf::st_as_sf(coords=c('longitude', 'latitude')) %>%
sf::st_set_crs(4326) %>%
mutate(taxon = factor(taxon, levels = c('Amphibia', 'Aves',
'Reptilia', 'Mammalia',
'Asteraceae', 'Cactaceae',
'Fabaceae', 'Solanaceae')))
uruguay_dptos <- readRDS('data/Uruguay.rds') %>% st_transform(4326)
argentina <- rnaturalearth::ne_countries(country = 'argentina', scale=50)
brazil <- rnaturalearth::ne_countries(country = 'brazil', scale=50)
southamerica <- rnaturalearth::ne_countries(continent = 'south america', scale=50)
ggplot() +
geom_sf(data=uruguay_dptos, fill='white') +
geom_sf(data=argentina, fill='grey85') +
geom_sf(data=brazil, fill='grey90') +
geom_sf(data=data_sf %>% filter(taxon %in% c('Amphibia',
'Aves',
'Mammalia',
'Reptilia')),
aes(col=taxon), show.legend = F) +
geom_sf(data=data_sf %>% filter(taxon %in% c('Asteraceae',
'Cactaceae',
'Fabaceae',
'Solanaceae')),
aes(col=taxon), show.legend = F) +
scale_color_brewer(palette = 'Set2') +
coord_sf(xlim = c(-59, -52.5), ylim = c(-35.5, -29.5), expand = FALSE) +
facet_wrap(~taxon, ncol = 4, ) +
theme_bw()Code
southamerica_map <- ggplot() +
geom_sf(data=southamerica, fill='grey95', col='grey45') +
geom_sf(data=st_union(uruguay_dptos), fill='red') +
coord_sf(xlim = c(-85, -32), ylim = c(-60, 15), expand = FALSE) +
theme_void()
tetra_map <- ggplot() +
geom_sf(data=uruguay_dptos, fill='white') +
geom_sf(data=argentina, fill='grey85') +
geom_sf(data=brazil, fill='grey90') +
geom_sf(data=data_sf %>% filter(taxon %in% c('Amphibia',
'Aves',
'Mammalia',
'Reptilia')),
aes(col=taxon)) +
# scale_color_brewer(palette = 'Paired') +
scale_color_manual(values = RColorBrewer::brewer.pal(n = 8, name = "Paired")[6:9]) +
coord_sf(xlim = c(-59, -52.5), ylim = c(-35.5, -29.5), expand = FALSE) +
theme_bw() + labs(col='', title = 'Tetrapods') +
theme(legend.position = 'bottom')
plants_map <- ggplot() +
geom_sf(data=uruguay_dptos, fill='white') +
geom_sf(data=argentina, fill='grey85') +
geom_sf(data=brazil, fill='grey90') +
geom_sf(data=data_sf %>% filter(taxon %in% c('Asteraceae',
'Cactaceae',
'Fabaceae',
'Solanaceae')),
aes(col=taxon)) +
scale_color_brewer(palette = 'Paired') +
coord_sf(xlim = c(-59, -52.5), ylim = c(-35.5, -29.5), expand = FALSE) +
theme_bw() + labs(col='', title = 'Plants') +
theme(legend.position = 'bottom')
southamerica_map | tetra_map | plants_mapCode
# ggsave(file='figs/observations_maps.svg',
# plot=(southamerica_map | tetra_map | plants_map),
# width=12, height=5, dpi = 100)Spatial coverage (grids)
Code
# create grid
uruguay_grid <- st_make_grid(st_union(uruguay_dptos), 0.1) %>%
st_intersection(st_union(uruguay_dptos)) %>%
st_sf(grid_id=1:length(.), geometry= .) %>%
st_make_valid() %>% st_cast()
# convert observations to an sf object
observations_dataset_sf <- observations_dataset %>%
sf::st_as_sf(coords=c('longitude', 'latitude')) %>%
sf::st_set_crs(4326) %>%
mutate(user_category = factor(user_category,
levels = c('beginner',
'intermediate',
'expert')))
expert_grid <- st_join(uruguay_grid,
observations_dataset_sf %>%
filter(user_category=='expert')) %>%
group_by(grid_id) %>%
summarise(n_observations=ifelse(n_distinct(taxon_species_name,
na.rm=T)!=0, n(), 0)) %>%
st_cast()
intermediate_grid <- st_join(uruguay_grid,
observations_dataset_sf %>%
filter(user_category=='intermediate')) %>%
group_by(grid_id) %>%
summarise(n_observations=ifelse(n_distinct(taxon_species_name,
na.rm=T)!=0, n(), 0)) %>%
st_cast()
beginner_grid <- st_join(uruguay_grid,
observations_dataset_sf %>%
filter(user_category=='beginner')) %>%
group_by(grid_id) %>%
summarise(n_observations=ifelse(n_distinct(taxon_species_name,
na.rm=T)!=0, n(), 0)) %>%
st_cast()
coverage <- tibble(experts = expert_grid %>%
st_drop_geometry() %>%
summarise(coverage=sum(n_observations>0) * 100 / n()) %>%
pull,
intermediates = intermediate_grid %>%
st_drop_geometry() %>%
summarise(coverage=sum(n_observations>0) * 100 / n()) %>%
pull,
beginners = beginner_grid %>% st_drop_geometry() %>%
summarise(coverage=sum(n_observations>0) * 100 / n()) %>%
pull)
experts_map <- tm_graticules(alpha = 0.3) +
tm_shape(expert_grid %>%
mutate(n_observations=ifelse(n_observations==0,
NA, n_observations))) +
tm_polygons(fill='n_observations', fill_alpha = 0.9,
col='grey90', col_alpha = 0.2,
fill.scale = tm_scale_intervals(n = 6,
style = 'jenks',
values = 'brewer.greens',
value.na = 'grey80',
label.na = '0'),
fill.legend = tm_legend(item.space = 0, item.na.space = 0,
title = 'Number of observations',
reverse = T, frame = F)) +
tm_shape(uruguay_dptos) +
tm_borders(col='grey60', fill_alpha = 0.4) +
tm_layout(legend.outside = T, frame.lwd = 0.2, frame.r = 0) +
# tm_credits(paste0(round(coverage$experts,0),
# '% cells covered'), position = c(0.6,0.9), size=1) +
tm_title('Experts', position = tm_pos_out('right', 'top'))
intermediates_map <- tm_graticules(alpha = 0.3) +
tm_shape(intermediate_grid %>%
mutate(n_observations=ifelse(n_observations==0,
NA, n_observations))) +
tm_polygons(fill='n_observations', fill_alpha = 0.9,
col='grey90', col_alpha = 0.2,
fill.scale = tm_scale_intervals(n = 6,
style = 'jenks',
values = 'brewer.reds',
value.na = 'grey80',
label.na = '0'),
fill.legend = tm_legend(item.space = 0, item.na.space = 0,
title = 'Number of observations',
reverse = T, frame = F)) +
tm_shape(uruguay_dptos) +
tm_borders(col='grey60', fill_alpha = 0.4) +
tm_layout(legend.outside = T, frame.lwd = 0.2, frame.r = 0) +
# tm_credits(paste0(round(coverage$intermediates,0),
# '% cells covered'), position = c(0.6,0.9), size=1) +
tm_title('Intermediates', position = tm_pos_out('right', 'top'))
beginners_map <- tm_graticules(alpha = 0.3) +
tm_shape(beginner_grid %>%
mutate(n_observations=ifelse(n_observations==0,
NA, n_observations))) +
tm_polygons(fill='n_observations', fill_alpha = 0.9,
col='grey90', col_alpha = 0.2,
fill.scale = tm_scale_intervals(n = 6,
style = 'jenks',
values = 'brewer.blues',
value.na = 'grey80',
label.na = '0'),
fill.legend = tm_legend(item.space = 0, item.na.space = 0,
title = 'Number of observations',
reverse = T, frame = F)) +
tm_shape(uruguay_dptos) +
tm_borders(col='grey60', fill_alpha = 0.4) +
tm_layout(legend.outside = T, frame.lwd = 0.2, frame.r = 0) +
# tm_credits(paste0(round(coverage$beginners,0),
# '% cells covered'), position = c(0.6,0.9), size=1) +
tm_title('Beginners', position = tm_pos_out('right', 'top'))
experts_mapCode
intermediates_mapCode
beginners_mapCode
# tmap_save(tm = experts_map, width = 8, height = 6,
# filename = 'figs/experts_grid.svg', dpi = 100)
# tmap_save(tm = intermediates_map, width = 8, height = 6,
# filename = 'figs/intermediates_grid.svg', dpi = 100)
# tmap_save(tm = beginners_map, width = 8, height = 6,
# filename = 'figs/beginners_grid.svg', dpi = 100)
coverage %>%
mutate(experts = scales::percent(experts/100, accuracy=1)) %>%
mutate(intermediates = scales::percent(intermediates/100, accuracy=1)) %>%
mutate(beginners = scales::percent(beginners/100, accuracy=1)) %>%
kableExtra::kbl(format.args = list(decimal.mark = '.',
big.mark = ","),
digits=2,
booktabs = T) %>%
kableExtra::kable_styling(latex_options = c('striped', 'hold_position'))| experts | intermediates | beginners |
|---|---|---|
| 42% | 39% | 41% |
Traits
Code
##### TETRAPODS
# distribution
tetra_plot_distribution <- tetrapods %>%
group_by(user_category, dist_class) %>% count() %>%
ggplot(aes(x='', y=n, fill= factor(dist_class,
levels = c('narrow', 'medium', 'wide')))) +
geom_bar(width = 0.5, stat = 'identity', show.legend = T,
position = 'fill') +
labs(x='', y='Proportion of records', fill = 'distribution') +
facet_grid(~ factor(user_category,
levels = c('beginner', 'intermediate', 'expert'))) +
scale_fill_brewer(palette = 'OrRd', name='distribution area') +
ggpubr::theme_pubclean() + theme(legend.position = 'bottom') +
ggtitle('(a)')
# size
tetra_plot_size <- tetrapods %>%
group_by(user_category, size_class) %>% count() %>%
ggplot(aes(x='', y=n, fill= factor(size_class,
levels = c('small', 'medium', 'large')))) +
geom_bar(width = 0.5, stat = 'identity', show.legend = T,
position = 'fill') +
labs(x='', y='', fill = 'size') +
facet_grid(~ factor(user_category,
levels = c('beginner', 'intermediate', 'expert'))) +
scale_fill_brewer(palette = 'OrRd', name='body size') +
ggpubr::theme_pubclean() + theme(legend.position = 'bottom') +
ggtitle('(b)')
# conservation status
tetra_plot_status <- tetrapods %>%
group_by(user_category, status) %>% count() %>%
ggplot(aes(x='', y=n, fill= factor(status,
levels = c('NE', 'DD', 'CR', 'EN', 'VU', 'NT', 'LC')))) +
geom_bar(width = 0.5, stat = 'identity', show.legend = T,
position = 'fill') +
labs(x='', y='', fill = 'IUCNglobal') +
facet_grid(~ factor(user_category,
levels = c('beginner', 'intermediate', 'expert'))) +
scale_fill_brewer(palette = 'OrRd', name='conservation status') +
ggpubr::theme_pubclean() + theme(legend.position = 'bottom') +
ggtitle('(c)')
tetra_plots <- tetra_plot_distribution | tetra_plot_size | tetra_plot_status
tetra_plotsCode
# ggsave(tetra_plots, dpi = 100,
# width = 18, height = 7, scale = 0.8,
# file = 'figs/Figure_4.svg')
##### PLANTS
# distribution
plants_plot_distribution <- plants %>%
group_by(user_category, dist_class) %>% count() %>%
ggplot(aes(x='', y=n, fill= factor(dist_class,
levels = c('narrow', 'medium', 'wide')))) +
geom_bar(width = 0.5, stat = 'identity', show.legend = T,
position = 'fill') +
labs(x='', y='Proportion of records', fill = 'distribution') +
facet_grid(~ factor(user_category,
levels = c('beginner', 'intermediate', 'expert'))) +
scale_fill_brewer(palette = 'Greens', name='distribution area') +
ggpubr::theme_pubclean() + theme(legend.position = 'bottom') +
ggtitle('(a)')
# size
plants_plot_growth <- plants %>%
mutate(growth = factor(growth,
levels = c('tree',
'shrub',
'vine', 'herb',
'liana', 'subshrub')),
user_category = factor(user_category,
levels = c('beginner',
'intermediate',
'expert'))) %>%
group_by(user_category, growth) %>%
count() %>%
ggplot(aes(x = "", y = n, fill = growth)) +
geom_bar(width = 0.5, stat = "identity", show.legend = TRUE, position = "fill") +
labs(x = "", y = "", fill = "growth form") +
facet_grid(~ user_category) +
scale_fill_brewer(palette = 'Greens', name='growth form') +
ggpubr::theme_pubclean() + theme(legend.position = 'bottom') +
ggtitle('(b)')
# conservation status
plants_plot_status <- plants %>%
group_by(user_category, status) %>% count() %>%
ggplot(aes(x='', y=n, fill= factor(status,
levels = c('NE', 'DD', 'CR', 'EN', 'VU', 'NT', 'LC')))) +
geom_bar(width = 0.5, stat = 'identity', show.legend = T,
position = 'fill') +
labs(x='', y='', fill = 'IUCNglobal') +
facet_grid(~ factor(user_category,
levels = c('beginner', 'intermediate', 'expert'))) +
scale_fill_brewer(palette = 'Greens', name='conservation status') +
ggpubr::theme_pubclean() + theme(legend.position = 'bottom') +
ggtitle('(c)')
plants_plots <- plants_plot_distribution | plants_plot_growth | plants_plot_status
plants_plotsCode
# ggsave(plants_plots, dpi = 100,
# width = 18, height = 7, scale = 0.8,
# file = 'figs/Figure_5.svg')Code
##### TETRAPODS
# size plot
supp_size_plot <- ggplot(data = tetrapods, aes(x = size, fill=taxon)) +
geom_histogram(position = 'identity',
bins = 15, show.legend = F) +
facet_wrap(taxon~., scales = 'free', ncol = 4) +
scale_fill_brewer(palette = 'Reds', direction = -1) +
ggpubr::theme_pubclean() +
labs(x= 'size', y='number of records') +
ggtitle('(a)')
# distribution plot
supp_distribution_plot <- ggplot(data = tetrapods, aes(x = dist, fill=taxon)) +
geom_histogram(position = 'identity',
bins = 15, show.legend = F) +
facet_wrap(taxon~., scales = 'free', ncol = 4) +
scale_fill_brewer(palette = 'Reds', direction = -1) +
ggpubr::theme_pubclean() +
labs(x= 'distribution area', y='number of records') +
ggtitle('(b)')
# conservation status plot
supp_status_plot <- ggplot(data = tetrapods, aes(x = status, fill=taxon)) +
geom_histogram(stat = 'count', show.legend = F) +
facet_wrap(taxon~., scales = 'free', ncol = 4) +
scale_fill_brewer(palette = 'Reds', direction = -1) +
ggpubr::theme_pubclean() +
labs(x= 'conservation status', y='number of records') +
ggtitle('(c)')
supp_tetra_plots <- supp_size_plot / supp_distribution_plot / supp_status_plot
supp_tetra_plotsCode
# ggsave(supp_tetra_plots, dpi = 100,
# width = 10, height = 15, scale = 0.8,
# file = 'figs/Figure_S2.svg')
##### PLANTS
# growth plot
supp_growth_plot <- ggplot(data = plants, aes(x = growth, fill=taxon)) +
geom_histogram(stat = 'count', show.legend = F) +
facet_wrap(taxon~., scales = 'free', ncol = 4) +
scale_fill_brewer(palette = 'Greens', direction = -1) +
ggpubr::theme_pubclean() +
labs(x= 'growth form', y='number of records') +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1)) +
ggtitle('(a)')
# distribution plot
supp_distribution_plot <- ggplot(data = plants, aes(x = dist, fill=taxon)) +
geom_histogram(position = 'identity',
bins = 15, show.legend = F) +
facet_wrap(taxon~., scales = 'free', ncol = 4) +
scale_fill_brewer(palette = 'Greens', direction = -1) +
ggpubr::theme_pubclean() +
labs(x= 'distribution area', y='number of records') +
ggtitle('(b)')
# conservation status plot
supp_status_plot <- ggplot(data = plants, aes(x = status, fill=taxon)) +
geom_histogram(stat = 'count', show.legend = F) +
facet_wrap(taxon~., scales = 'free', ncol = 4) +
scale_fill_brewer(palette = 'Greens', direction = -1) +
ggpubr::theme_pubclean() +
labs(x= 'conservation status', y='number of records') +
ggtitle('(c)')
supp_plants_plots <- supp_growth_plot / supp_distribution_plot / supp_status_plot
supp_plants_plotsCode
# ggsave(supp_plants_plots, dpi = 100,
# width = 10, height = 15, scale = 0.8,
# file = 'figs/Figure_S3.svg')Statistical analyses
Tetrapods
Hypothesis 1
Code
mod_tetra <- lm(ranking ~ dist_scaled + size_scaled + status,
data = tetrapods)
# summary(mod_tetra)
car::residualPlots(mod_tetra) Test stat Pr(>|Test stat|)
dist_scaled 3.0475 0.0023099 **
size_scaled -3.3552 0.0007945 ***
status
Tukey test -0.1987 0.8425336
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Code
plot(predictorEffect(predictor = 'dist_scaled', mod = mod_tetra))Code
plot(predictorEffect(predictor = 'size_scaled', mod = mod_tetra))Hypothesis 2
Code
tapply(tetrapods$dist, INDEX = tetrapods$user_category, mean) expert intermediate beginner
16.77023 17.38902 17.43221
Code
tapply(tetrapods$dist, INDEX = tetrapods$user_category, sd) expert intermediate beginner
4.766938 4.124072 4.101917
Code
mod_tetra_user_mean <- lm(ranking ~ mean_size + mean_dist,
data = tetrapods_per_user)
# summary(mod_tetra_user_mean)
mod_tetra_user_sd <- lm(ranking ~ sd_dist + sd_size,
data = tetrapods_per_user)
# summary(mod_tetra_user_sd)
pred_mod_tetra_user_mean <- as.data.frame(predictorEffects(mod =
lm(ranking ~ mean_dist + mean_size,
data = tetrapods_per_user)))
plot_pred_mean_dist <- ggplot(data = pred_mod_tetra_user_mean$mean_dist,
aes(x = mean_dist, y = fit)) +
geom_point(data = tetrapods_per_user,
aes(y = ranking, x = mean_dist, col=user_category),
show.legend = F) +
geom_ribbon(aes(ymin = lower, ymax = upper),
fill='grey40', alpha=0.5) +
geom_line(col='black', linewidth=1, linetype = 'dashed') +
labs(y='ranking', x='mean(distribution) per user', col='') +
# ylim(c(0,600))+
ggpubr::theme_classic2()
plot_pred_mean_size <- ggplot(data = pred_mod_tetra_user_mean$mean_size,
aes(x = mean_size, y = fit)) +
geom_point(data = tetrapods_per_user,
aes(y = ranking, x = mean_size, col=user_category),
show.legend = F) +
geom_ribbon(aes(ymin = lower, ymax = upper),
fill='grey40', alpha=0.5) +
geom_line(col='black', linewidth=1, linetype = 'dashed') +
labs(y='ranking', x='mean(size) per user', col='') +
# ylim(c(0,600))+
ggpubr::theme_classic2()
pred_mod_tetra_user_sd <- as.data.frame(predictorEffects(mod =
lm(ranking ~ sd_dist + sd_size,
data = tetrapods_per_user)))
plot_pred_sd_dist <- ggplot(data = pred_mod_tetra_user_sd$sd_dist,
aes(x = sd_dist, y = fit)) +
geom_point(data = tetrapods_per_user,
aes(y = ranking, x = sd_dist, col=user_category)) +
geom_ribbon(aes(ymin = lower, ymax = upper),
fill='grey40', alpha=0.5) +
geom_line(col='black', linewidth=1, linetype = 'dashed') +
labs(y='', x='SD(distribution) per user', col='') +
# ylim(c(0,600))+
ggpubr::theme_classic2()
plot_pred_sd_size <- ggplot(data = pred_mod_tetra_user_sd$sd_size,
aes(x = sd_size, y = fit)) +
geom_point(data = tetrapods_per_user,
aes(y = ranking, x = sd_size, col=user_category)) +
geom_ribbon(aes(ymin = lower, ymax = upper),
fill='grey40', alpha=0.5) +
geom_line(col='black', linewidth=1, linetype = 'dashed') +
labs(y='', x='SD(size) per user', col='') +
# ylim(c(0,600))+
ggpubr::theme_classic2()
plot_pred_mean_size | plot_pred_sd_sizeCode
plot_pred_mean_dist | plot_pred_sd_distModels’ summary
Code
stargazer(mod_tetra, mod_tetra_user_mean, mod_tetra_user_sd,
ci = T, digits=1,
type='html',
title = 'tetrapods')| Dependent variable: | |||
| ranking | |||
| (1) | (2) | (3) | |
| dist_scaled | 4.6*** | ||
| (2.7, 6.4) | |||
| size_scaled | 2.5*** | ||
| (0.8, 4.2) | |||
| statusEN | -49.2 | ||
| (-229.6, 131.3) | |||
| statusVU | -25.1 | ||
| (-204.4, 154.2) | |||
| statusNT | -38.1 | ||
| (-217.1, 140.9) | |||
| statusDD | -21.3 | ||
| (-203.5, 161.0) | |||
| statusNE | -10.4 | ||
| (-189.5, 168.6) | |||
| statusLC | -28.6 | ||
| (-207.4, 150.2) | |||
| mean_size | 0.4 | ||
| (-0.3, 1.1) | |||
| mean_dist | 17.0*** | ||
| (5.6, 28.5) | |||
| sd_dist | -24.9*** | ||
| (-34.7, -15.0) | |||
| sd_size | 0.2 | ||
| (-0.3, 0.7) | |||
| Constant | 95.6 | -3.7 | 376.9*** |
| (-83.2, 274.4) | (-213.1, 205.8) | (338.3, 415.5) | |
| Observations | 22,918 | 358 | 358 |
| R2 | 0.003 | 0.02 | 0.1 |
| Adjusted R2 | 0.003 | 0.02 | 0.1 |
| Residual Std. Error | 128.9 (df = 22909) | 239.7 (df = 355) | 234.2 (df = 355) |
| F Statistic | 8.5*** (df = 8; 22909) | 4.3** (df = 2; 355) | 12.9*** (df = 2; 355) |
| Note: | p<0.1; p<0.05; p<0.01 | ||
Plants
Hypothesis 1
Code
mod_plants <- lm(ranking ~ dist_scaled + growth + status,
data = plants)
# summary(mod_plants)
car::residualPlots(mod_plants) Test stat Pr(>|Test stat|)
dist_scaled -0.882 0.3778
growth
status
Tukey test -0.601 0.5478
Code
plot(predictorEffect(predictor = 'dist_scaled', mod = mod_plants))Hypothesis 2
Code
tapply(plants$dist, INDEX = plants$user_category, mean) expert intermediate beginner
7.464803 7.402016 7.282389
Code
tapply(plants$dist, INDEX = plants$user_category, sd) expert intermediate beginner
4.769118 4.534960 4.649368
Code
mod_plants_user_mean <- lm(ranking ~ mean_dist,
data = plants_per_user)
# summary(mod_plants_user_mean)
mod_plants_user_sd <- lm(ranking ~ sd_dist,
data = plants_per_user)
# summary(mod_plants_user_sd)
pred_mod_plants_user_mean <- as.data.frame(predictorEffects(mod =
lm(ranking ~ mean_dist,
data = plants_per_user)))
plot_pred_mean_dist <- ggplot(data = pred_mod_plants_user_mean$mean_dist,
aes(x = mean_dist, y = fit)) +
geom_point(data = plants_per_user,
aes(y = ranking, x = mean_dist, col=user_category),
show.legend = F) +
geom_ribbon(aes(ymin = lower, ymax = upper),
fill='grey40', alpha=0.5) +
geom_line(col='black', linewidth=1, linetype = 'dashed') +
labs(y='', x='mean(distribution) per user') +
# ylim(c(0,600))+
ggpubr::theme_classic2()
pred_mod_plants_user_sd <- as.data.frame(predictorEffects(mod =
lm(ranking ~ sd_dist,
data = plants_per_user)))
plot_pred_sd_dist <- ggplot(data = pred_mod_plants_user_sd$sd_dist,
aes(x = sd_dist, y = fit)) +
geom_point(data = plants_per_user,
aes(y = ranking, x = sd_dist, col=user_category)) +
geom_ribbon(aes(ymin = lower, ymax = upper),
fill='grey40', alpha=0.5) +
geom_line(col='black', linewidth=1, linetype = 'dashed') +
labs(y='ranking', x='SD(distribution) per user', col='') +
# ylim(c(0,600))+
ggpubr::theme_classic2()
plot_pred_mean_dist | plot_pred_sd_distModels’ summary
Code
stargazer(mod_plants,
mod_plants_user_mean,
mod_plants_user_sd,
ci = T, digits=1,
type='html',
title = 'plants')| Dependent variable: | |||
| ranking | |||
| (1) | (2) | (3) | |
| dist_scaled | -2.0 | ||
| (-4.6, 0.6) | |||
| growthvine | -4.2 | ||
| (-28.7, 20.3) | |||
| growthliana | 3.5 | ||
| (-27.0, 34.0) | |||
| growthsubshrub | 4.2 | ||
| (-3.8, 12.3) | |||
| growthshrub | 1.9 | ||
| (-5.0, 8.8) | |||
| growthtree | 12.8** | ||
| (0.6, 25.0) | |||
| statusEN | 29.4 | ||
| (-111.9, 170.6) | |||
| statusVU | 18.4 | ||
| (-113.1, 149.8) | |||
| statusNT | 3.1 | ||
| (-147.4, 153.7) | |||
| statusDD | 147.5 | ||
| (-37.0, 332.0) | |||
| statusNE | 45.2 | ||
| (-85.2, 175.7) | |||
| statusLC | 43.6 | ||
| (-87.1, 174.2) | |||
| mean_dist | -14.6** | ||
| (-28.0, -1.1) | |||
| sd_dist | -23.9** | ||
| (-42.5, -5.4) | |||
| Constant | 33.1 | 380.5*** | 378.0*** |
| (-97.3, 163.5) | (280.1, 480.9) | (294.6, 461.4) | |
| Observations | 10,821 | 291 | 291 |
| R2 | 0.002 | 0.02 | 0.02 |
| Adjusted R2 | 0.001 | 0.01 | 0.02 |
| Residual Std. Error | 133.0 (df = 10808) | 217.0 (df = 289) | 216.3 (df = 289) |
| F Statistic | 2.1** (df = 12; 10808) | 4.5** (df = 1; 289) | 6.4** (df = 1; 289) |
| Note: | p<0.1; p<0.05; p<0.01 | ||